training_data = [("你因错过太阳而流泪", "你也将错过群星"), ("夜幕降临", "群星闪烁")]

# 创建词汇表
vocab = {}
index = 0
for input_text, target_text in training_data:
    for word in input_text:
        if word not in vocab:
            vocab[word] = index
            index += 1
    for word in target_text:
        if word not in vocab:
            vocab[word] = index
            index += 1

# 对训练数据进行编码
encoded_training_data = []
for input_text, target_text in training_data:
    encoded_input = [vocab[word] for word in input_text]
    encoded_target = [vocab[word] for word in target_text]
    encoded_training_data.append((encoded_input, encoded_target))

print("Vocabulary:", vocab)
print("Encoded Training Data:", encoded_training_data)



if __name__ == '__main__':
    print("over")